#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 14 14:15:16 2017

@author: agan

每个Excel有多个sheet，sheet格式相同

"""

import xlrd
import pandas as pd

# path = '/home/agan/Documents/weiwei/第1周/Excel合并作业-呆鸟/1.xlsx'

def return_data(path):
    workbook = xlrd.open_workbook(path)  # 打开excel
    sheets_num = len(workbook.sheets())   # 读取sheets的数量，稍候用sheet的索引来访问
    data = []  # 建立空列表，稍候存储数据
    for x in range(0, sheets_num):  # 遍历所有sheets
        sheet = workbook.sheets()[x]   # .sheets()[x] 加上索引x取指定的sheet
        nrows = sheet.nrows    # 获得当前sheet的行数 
        for y in range(1, nrows):     # 遍历所有行，从1开始，不要标题行，若需要就得重0开始
            row_value = sheet.row_values(y)    # 获得行值，返回数据以列表形式存储
            data.append(row_value)     # 添加到data列表里
    return data

# a = return_data(path)
# print(a)

'''
[['苹果1', 1.0],
 ['苹果2', 2.0],
 ['苹果3', 3.0],
 ['苹果4', 4.0],
 ['苹果5', 5.0],
 ['苹果6', 6.0],
 ['苹果7', 7.0],
 ['苹果8', 8.0],
 ['苹果9', 9.0],
 ['苹果10', 10.0],
 ['苹果11', 11.0],
 ['苹果12', 12.0],
 ['苹果13', 13.0],
 ['苹果14', 14.0],
 ['苹果15', 15.0],
 ['苹果16', 16.0],
 ['苹果17', 17.0],
 ['苹果18', 18.0],
 ['苹果19', 19.0],
 ['苹果20', 20.0]]
 '''


if __name__ == '__main__':
    path_list = ['/home/agan/Documents/weiwei/第1周/Excel合并作业-呆鸟/1.xlsx',
                 '/home/agan/Documents/weiwei/第1周/Excel合并作业-呆鸟/2.xlsx']
    combine_data = []
    for path in path_list:
        path_data = return_data(path)
        combine_data.extend(path_data)
    f = pd.DataFrame(combine_data, index = None, columns = ['产品','数量'])
    f.to_excel("'/home/agan/Documents/weiwei/第1周/Excel合并作业-呆鸟/3.xlsx'",sheet_name = "合并", index = False)




'''
12.15
如果该路径下的excel文件太多，手动输入path_list很麻烦，可以借助 os模块和re模块，筛选指定路径下的excel文件.xlsx格式的
那么，代码修改如下
'''
import xlrd
import pandas as pd
import os
import re


def return_data(path):
    workbook = xlrd.open_workbook(path)
    sheets_num = len(workbook.sheets())
    data = []
    for x in range(0, sheets_num):
        table = workbook.sheets()[x]
        nrows = table.nrows
        for y in range(1, nrows):
            row_values = table.row_values(y)
            data.append(row_values)
    return data


if __name__ == '__main__':
    path = 'D:\\Agan\\learn_python\\源码\\第1周\\Excel合并作业-呆鸟'
    # 若想更智能的话，此处path变量可以用input函数，运行程序时，手动填写要遍历的文件夹

    path_list = os.listdir(path)     # 遍历文件夹下所有文件
    
    par = ".+?\.xlsx"   # 正则匹配 。xlsx文件
    for name in path_list:
        res = re.match(par, name)
        if res is None:
            path_list.remove(name)
    

    combine_data = []
    for name in path_list:
        path_name = path + "\\" + name
        path_data = return_data(path_name)
        combine_data.extend(path_data)
    f = pd.DataFrame(combine_data, index = None, columns = ['产品','数量'])
    f.to_excel("D:\\Agan\\learn_python\\源码\\第1周\\Excel合并作业-呆鸟\\3.xlsx",sheet_name = "合并", index = False)
    print("Done")